Let’s import at all of the data that Simon pulled:

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(ggthemes)

DeltasClean <- read_csv("../data/out/deltas_clean_v2.csv") 
## Parsed with column specification:
## cols(
##   Delta = col_character(),
##   location = col_character(),
##   surface = col_character(),
##   year = col_double(),
##   month = col_double(),
##   ndvi = col_double(),
##   red = col_double(),
##   evi = col_double(),
##   savi = col_double(),
##   gr = col_double(),
##   ndssi = col_double()
## )
DeltaLocations <- read_csv("../data/DeltaLocations.csv")
## Parsed with column specification:
## cols(
##   Deltas = col_character(),
##   Lat = col_double(),
##   Lon = col_double()
## )

As a reminder, for each of the 47 deltas there are measurements of Land & Water areas at Upstream, Downstream and ‘Middle’ locations on the delta. We first lump all the observations together, and look to see which Deltas have many observations:

#counts per delta
DeltaCounts <- count(DeltasClean, Delta)
DeltaCounts

Now, by each month.. where the colorbar represents the number of observations (n) for each month for a given delta:

DeltaObsPerMonth <- count(DeltasClean, Delta, month)

ggplot(DeltaObsPerMonth, aes(y = Delta, x = month, fill=n)) + geom_tile() + 
  scale_x_discrete(limits = c(1:12), breaks = c(1:12)) +
  expand_limits(x = c(1,12)) + 
  scale_fill_gradient( trans = 'log' )

In the above heat map, dark colors (and no color) represent data paucity (and data gaps). Deltas with light colors (e.g., the Parana, Nile, Ebro, Colorado, Brahmani) have lots of data, spread out through the months of the year.

I’ll remove/subset the deltas with sparse coverage (specifically, months with no coverage)….

# need 10 data points per month for NDSSI and NDVI
EnoughObsPerMonth <- DeltasClean %>% ungroup() %>%
  count(Delta, month, surface) %>% 
  group_by(surface) %>%
  filter( n >= 5)

#find deltas missing a given month of observations
DeltaMonthCounts <- EnoughObsPerMonth %>%
  ungroup() %>%
  count(Delta)

# need 12 months of water and land obs, so 24 mo total
EnoughMonths <- DeltaMonthCounts %>%
 filter( n == 24)

CompleteObsDeltas <- pull(EnoughMonths, Delta)

#remove them
DeltasCleaner <- DeltasClean %>%
  filter(Delta %in% CompleteObsDeltas)

#add the real dates in month date format
DeltasCleaner$date <- as.Date(paste(DeltasCleaner$year, DeltasCleaner$month, "01", sep="-"), "%Y-%m-%d")

#remove intermediate data
rm(CompleteObsDeltas, EnoughMonths, DeltaMonthCounts)

#EnoughMonths

and extract some metrics; specifically I will make a timeseries of NDVI and NDSSI for each delta using the mean value for each month.

#take the mean NDVI and NDSSI for each month, for each delta
DeltaMeans <- DeltasCleaner %>%
  group_by(Delta, month, surface) %>%
  summarize(MeanNDVI = mean(ndvi, na.rm = TRUE), MeanNDSSI = mean(ndssi, na.rm = TRUE))

#make a 9 column data frame with:
#delta, 
#max and min NDVI month, 
#NDSSI max and min month, 
#max and min values for both NDVI and NDSSI

#####

DeltaMaxNDVI <- 
  DeltaMeans %>% 
  filter(surface == 'Land')  %>% 
  select (-c(MeanNDSSI)) %>% 
  group_by(Delta) %>% 
  slice(which.max(MeanNDVI)) %>% 
  rename(MaxMeanNDVImonth = month, MaxMeanNDVI = MeanNDVI)

DeltaMaxNDSSI <- 
  DeltaMeans %>% 
  filter(surface == 'Water')  %>% 
  select (-c(MeanNDVI)) %>% 
  group_by(Delta) %>% 
  slice(which.max(MeanNDSSI)) %>% 
  rename(MaxMeanNDSSImonth = month, MaxMeanNDSSI = MeanNDSSI)

DeltaMinNDVI <- 
  DeltaMeans %>% 
  filter(surface == 'Land')  %>% 
  select (-c(MeanNDSSI)) %>% 
  group_by(Delta) %>% 
  slice(which.min(MeanNDVI)) %>% 
  rename(MinMeanNDVImonth = month, MinMeanNDVI = MeanNDVI)

DeltaMinNDSSI <- 
  DeltaMeans %>% 
  filter(surface == 'Water')  %>% 
  select (-c(MeanNDVI)) %>% 
  group_by(Delta) %>% 
  slice(which.min(MeanNDSSI)) %>% 
  rename(MinMeanNDSSImonth = month, MinMeanNDSSI = MeanNDSSI)


#join into 1 dataframe
DeltaMaxMin <- left_join(DeltaMaxNDVI, DeltaMaxNDSSI, by = 'Delta') %>% 
  left_join(.,DeltaMinNDVI, by = 'Delta') %>% 
  left_join(.,DeltaMinNDSSI, by = 'Delta')

#remove intermediate data
rm(DeltaMaxNDVI, DeltaMaxNDSSI, DeltaMinNDSSI,DeltaMinNDVI)

And now we can look at the phase shifts between these two time series (the timeseries of mean NDVI and mean NDSSI). Here are the phase shifts (in month) for each delta:

#compare offset
DeltaMaxMin <- mutate(DeltaMaxMin, 
                      MinOffset = if_else(abs(MinMeanNDVImonth - MinMeanNDSSImonth) > 6, 
                                          12 - abs(MinMeanNDVImonth - MinMeanNDSSImonth),
                                          abs(MinMeanNDVImonth - MinMeanNDSSImonth)),
                      MaxOffset = if_else(abs(MaxMeanNDVImonth - MaxMeanNDSSImonth) > 6, 
                                         12 - abs(MaxMeanNDVImonth - MaxMeanNDSSImonth),
                                          abs(MaxMeanNDVImonth - MaxMeanNDSSImonth)),
                      OffsetDiff = abs(MaxOffset - MinOffset),
                      rangeNDVI = (MaxMeanNDVI - MinMeanNDVI), 
                      rangeNDSSI = (MaxMeanNDSSI - MinMeanNDSSI),
                      halfPeriodNDVI = if_else(abs(MaxMeanNDVImonth - MinMeanNDVImonth) > 6, 
                                          12 - abs(MaxMeanNDVImonth - MinMeanNDVImonth),
                                          abs(MaxMeanNDVImonth - MinMeanNDVImonth)),
                      halfPeriodNDSSI = if_else(abs(MaxMeanNDSSImonth - MinMeanNDSSImonth) > 6, 
                                          12 - abs(MaxMeanNDSSImonth - MinMeanNDSSImonth),
                                          abs(MaxMeanNDSSImonth - MinMeanNDSSImonth)), )

# DeltaMaxMin <- 
#   DeltaMaxMin   %>%
#   select (c(Delta, MinOffset, MaxOffset, OffsetDiff, rangeNDVI, rangeNDSSI,MaxMeanNDSSI,MinMeanNDSSI,MaxMeanNDVI,MinMeanNDVI)) 

DeltaMaxMin
ggplot(DeltaMaxMin, aes(y = Delta, x = MaxOffset)) + geom_point() + 
  scale_x_discrete(limits = c(1:6), breaks = c(1:6)) +
  expand_limits(x = c(0,6))  + 
  ggtitle("MaxOffset")

ggplot(DeltaMaxMin, aes(y = Delta, x = MinOffset)) + geom_point() + 
  scale_x_discrete(limits = c(1:6), breaks = c(1:6)) +
  expand_limits(x = c(0,6))  + 
  ggtitle("MinOffset")

ggplot(DeltaMaxMin, aes(y = Delta, x = OffsetDiff)) + geom_point() + 
  scale_x_discrete(limits = c(1:6), breaks = c(1:6)) +
  expand_limits(x = c(0,6))  + 
  ggtitle("Offset Difference")

Now let’s examine the histograms of all 31 deltas… The months with the greatest mean NDVI, months with gretaest mean NDSSI, the monthly offset, and the skew of the NDSSI and NDVI timeseries.

ggplot(DeltaMaxMin, aes(x = MaxMeanNDVImonth)) + 
  geom_dotplot(binwidth = 1,dotsize = 0.5) + 
  scale_y_continuous(NULL, breaks = NULL) + 
  scale_x_discrete(limits = c(1:12), breaks = c(1:12)) + 
  labs(x = "Month") +
  ggtitle("Month of maximum mean NDVI")

ggplot(DeltaMaxMin, aes(x = MaxMeanNDSSImonth)) + 
  geom_dotplot(binwidth = 1,dotsize = 0.5) + 
  scale_y_continuous(NULL, breaks = NULL) + 
  scale_x_discrete(limits = c(1:12), breaks = c(1:12)) + 
  labs(x = "Month") +
  ggtitle("Month of maximum mean NDSSI")

ggplot(DeltaMaxMin, aes(x = MaxOffset)) + 
  geom_dotplot(binwidth = 1,dotsize = 0.25) + 
  scale_y_continuous(NULL, breaks = NULL) + 
  scale_x_discrete(limits = c(0:6), breaks = c(0:6)) + 
  labs(x = "Months") +
  ggtitle("Months Offset between NDVI and NDSSI")

ggplot(DeltaMaxMin, aes(x = halfPeriodNDVI)) + 
  geom_dotplot(binwidth = 1,dotsize = 0.25) + 
  scale_y_continuous(NULL, breaks = NULL) + 
  scale_x_discrete(limits = c(0:6), breaks = c(0:6)) + 
  labs(x = "Months") +
  ggtitle("half period NDVI ")

ggplot(DeltaMaxMin, aes(x = halfPeriodNDSSI)) + 
  geom_dotplot(binwidth = 1,dotsize = 0.25) + 
  scale_y_continuous(NULL, breaks = NULL) + 
  scale_x_discrete(limits = c(0:6), breaks = c(0:6)) + 
  labs(x = "Months") +
  ggtitle("half period NDSSI")

Ok, so the idea is that peak NDSSI is more effective if it occurs at moderate NDVI, so let’s look at the NDVI value for the months with peak NDSSI.

#extract NDVI value for each delta a the month of max NDSSI value

DeltaNDVIatMaxNDSSI <- DeltaMaxMin %>%
  select(Delta,MaxMeanNDSSImonth)

DeltaMeansToJoin <- DeltaMeans %>%
  filter(surface == 'Land')

DeltaNDVIatMaxNDSSI <- left_join(DeltaNDVIatMaxNDSSI, DeltaMeansToJoin, 
                                 by = c('Delta', 'MaxMeanNDSSImonth' ='month'))
 
DeltaNDVIatMaxNDSSI <- DeltaNDVIatMaxNDSSI %>%
  select (-c(surface, MeanNDSSI))

DeltaNDVIatMaxNDSSI
ggplot(DeltaNDVIatMaxNDSSI, aes(x = MeanNDVI)) + 
  geom_dotplot(binwidth = 0.1, dotsize = 0.25) + 
  scale_y_continuous(NULL, breaks = NULL)  + 
  xlim(0,1) +
  labs(x = "NDVI") +
  ggtitle("NDVI at month of maximum mean NDSSI")

How about the NDVI for months with the lowest NDSSI

#extract NDVI value for each delta a the month of min NDSSI value

DeltaNDVIatMinNDSSI <- DeltaMaxMin %>%
  select(Delta,MinMeanNDSSImonth)

DeltaMeansToJoin <- DeltaMeans %>%
  filter(surface == 'Land')

DeltaNDVIatMinNDSSI <- left_join(DeltaNDVIatMinNDSSI, DeltaMeansToJoin, 
                                 by = c('Delta', 'MinMeanNDSSImonth' ='month'))
 
DeltaNDVIatMinNDSSI <- DeltaNDVIatMinNDSSI %>%
  select (-c(surface, MeanNDSSI))

DeltaNDVIatMaxNDSSI
ggplot(DeltaNDVIatMinNDSSI, aes(x = MeanNDVI)) + 
  geom_dotplot(binwidth = 0.1, dotsize = 0.25) + 
  scale_y_continuous(NULL, breaks = NULL)  + 
  xlim(0,1) +
  labs(x = "NDVI") +
  ggtitle("NDVI at month of min mean NDSSI")

Just to explore the data a bit, here are the phase shifts/offsets against other measured parameters for each delta. The range, max and mean of NDVI and NDSSI is calculated from the timeseries, so it is really the max, min, and range of the monthly means (i.e., the maximum of the means, the minimum of the means, and the range of the mean). Offset is measured in months.

slice1 <- ggplot(DeltaMaxMin, aes(y = rangeNDVI, x = MaxOffset)) + geom_point() 
# + scale_x_discrete(limits = c(1:6), breaks = c(1:6)) + expand_limits(x = c(1,6)) 

slice2 <- ggplot(DeltaMaxMin, aes(y = rangeNDVI, x = rangeNDSSI)) + geom_point() 
slice3 <- ggplot(DeltaMaxMin, aes(y = rangeNDSSI, x = MaxOffset)) + geom_point() 

slice4 <- ggplot(DeltaMaxMin, aes(y = MaxMeanNDVI, x = rangeNDVI)) + geom_point() 
slice5 <- ggplot(DeltaMaxMin, aes(y = MaxMeanNDVI, x = rangeNDSSI)) + geom_point() 
slice6 <- ggplot(DeltaMaxMin, aes(y = MaxMeanNDVI, x = MaxOffset)) + geom_point() 

slice7 <- ggplot(DeltaMaxMin, aes(y = MaxMeanNDSSI, x = MaxMeanNDVI)) + geom_point() 
slice8 <- ggplot(DeltaMaxMin, aes(y = MaxMeanNDSSI, x = rangeNDSSI)) + geom_point() 
slice9 <- ggplot(DeltaMaxMin, aes(y = MaxMeanNDSSI, x = MaxOffset)) + geom_point() 
slice10 <- ggplot(DeltaMaxMin, aes(y = MaxMeanNDSSI, x = rangeNDVI)) + geom_point() 

grid.arrange(slice1, slice2, slice3, slice4, slice5, slice6, ncol=3)

grid.arrange(slice7, slice8, slice9, slice10, ncol=2)

#remove those panels
rm(slice1, slice2, slice3, slice4, slice5, slice6,slice7, slice8, slice9, slice10)

Join Latitude and longitude data

DeltaDatawLocations <- left_join(DeltaMaxMin, DeltaLocations, by = c("Delta" = "Deltas"))

DeltaDatawLocations <- DeltaDatawLocations %>%
  mutate(Absolute_Latitude= abs(Lat))

plot params vs lat

loc1 <- ggplot(DeltaDatawLocations, aes(y = Absolute_Latitude, x = MaxOffset)) + geom_point() 
loc2 <- ggplot(DeltaDatawLocations, aes(y = Absolute_Latitude, x = rangeNDSSI)) + geom_point() 
loc3 <- ggplot(DeltaDatawLocations, aes(y = Absolute_Latitude, x = rangeNDVI)) + geom_point() 
loc4 <- ggplot(DeltaDatawLocations, aes(y = Absolute_Latitude, x = MaxMeanNDVI)) + geom_point() 
loc5 <- ggplot(DeltaDatawLocations, aes(y = Absolute_Latitude, x = MaxMeanNDSSI)) + geom_point() 

grid.arrange(loc1, loc2, loc3, loc4, loc5, ncol=2)

loc1

#ggsave("loc1.pdf", width = 4, height = 4)
loc2

#ggsave("loc2.pdf", width = 4, height = 4)
loc3

#ggsave("loc3.pdf", width = 4, height = 4)

#remove those panels
rm(loc1, loc2, loc3, loc4, loc5)
#find the linear model 
DeltaOffset_lm <- lm( Absolute_Latitude ~ MaxOffset, data = DeltaDatawLocations) 

summary(DeltaOffset_lm)
## 
## Call:
## lm(formula = Absolute_Latitude ~ MaxOffset, data = DeltaDatawLocations)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.779  -8.983   0.775   8.377  20.772 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   10.820      4.629   2.337  0.02653 * 
## MaxOffset      4.079      1.157   3.525  0.00143 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.06 on 29 degrees of freedom
## Multiple R-squared:  0.2999, Adjusted R-squared:  0.2758 
## F-statistic: 12.43 on 1 and 29 DF,  p-value: 0.001428
ggplot(DeltaDatawLocations, aes(x = Absolute_Latitude, y = MaxOffset)) + 
  geom_point() +
  geom_smooth(mapping = aes(x = Absolute_Latitude, y = MaxOffset, ), method=lm ) 

Now for some maps of the data maps:

world <- ggplot() +
  borders("world", colour = "gray85", fill = "gray80") +
  theme_map() 

DeltaOffsetMap <- world +
  geom_point(aes(x = Lon, y = Lat, color = MaxOffset),
             data = DeltaDatawLocations, 
             size = 5) + scale_color_gradient( high = "red", low  = "yellow") +
  ggtitle("Offset Between NDVI peak on Land and NDSSI peak in water")

DeltaOffsetMap

#ggsave("DeltaOffsetMap.pdf", width = 6, height = 4)
world <- ggplot() +
  borders("world", colour = "gray85", fill = "gray80") +
  theme_map() 

DeltaNDVIrangeMap <- world +
  geom_point(aes(x = Lon, y = Lat, color = rangeNDVI),
             data = DeltaDatawLocations,
             size = 5) + scale_color_gradient( high = "red", low  = "yellow") + 
  ggtitle("NDVI range")


DeltaNDSSIrangeMap  <- world +
  geom_point(aes(x = Lon, y = Lat, color = rangeNDSSI),
             data = DeltaDatawLocations, 
             size = 5) + scale_color_gradient( high = "red", low = "yellow") + ggtitle("NDSSI range") 

DeltaNDVIrangeMap 

#ggsave("DeltaNDVIrangeMap.pdf", width = 6, height = 4)
DeltaNDSSIrangeMap

#ggsave("DeltaNDSSIrangeMap.pdf", width = 6, height = 4)

Let’s look at some of the timeseries To quantify the water, we use NDSSI. to quantify land, we use NDVI.

First here is the function to make the plots:

DeltaPlotter <- function(DeltaName) {
  #Counts each month
  numVeg <- DeltasCleaner %>%
    select(Delta, surface, month, ndvi) %>%
    filter(Delta == DeltaName & surface == "Land" & !is.na(ndvi)) %>%
    group_by(month) %>%
    summarize(n = n())
  
  numSed <- DeltasCleaner %>%
    select(Delta, surface, month, ndssi) %>%
    filter(Delta == DeltaName &
             surface == "Water" & !is.na(ndssi)) %>%
    group_by(month) %>%
    summarize(n = n())
  
  #Highlight the Maximum and Minimum Month for each delta, NDVI and NDSSI
  
  #LAND
  Veg <-
    ggplot(data = filter(DeltasCleaner, Delta == DeltaName &
                           surface == "Land")) +
    geom_boxplot(aes(x = month, y = ndvi, group = month)) +
    scale_x_discrete(limits = c(1:12), breaks = c(1:12)) +
    expand_limits(x = c(1, 12)) +
    ggtitle(DeltaName) +
    #geom_text(data = numVeg, aes(y = 1.05, x = month, label = n)) +
    geom_boxplot(
      data = filter(
        DeltasCleaner,
        Delta == DeltaName &
          surface == "Land" & month == DeltaMaxMin$MaxMeanNDVImonth[DeltaMaxMin$Delta == DeltaName] 
      ),
      aes(x = month, y = ndvi, group = month),
      fill = "green"
    ) +
    geom_boxplot(
      data = filter(
        DeltasCleaner,
        Delta == DeltaName & 
          surface == "Land" & month == DeltaMaxMin$MinMeanNDVImonth[DeltaMaxMin$Delta ==DeltaName]
      ),
      aes(x = month, y = ndvi, group = month),
      fill = "blue"
    )
  
  
  Sed <-
    ggplot(data = filter(DeltasCleaner, Delta == DeltaName &
                           surface == "Water")) +
    geom_boxplot(aes(x = month, y = ndssi, group = month)) +
    scale_x_discrete(limits = c(1:12), breaks = c(1:12)) +
    expand_limits(x = c(1, 12)) +
    #geom_text(data = numSed, aes(y = 1.05, x = month, label = n)) +
    geom_boxplot(
      data = filter(
        DeltasCleaner,
        Delta == DeltaName &
          surface == "Water" & month == DeltaMaxMin$MaxMeanNDSSImonth[DeltaMaxMin$Delta == DeltaName]
      ),
      aes(x = month, y = ndssi, group = month),
      fill = "green"
    ) +
    geom_boxplot(
      data = filter(
        DeltasCleaner,
        Delta == DeltaName &
          surface == "Water" & month == DeltaMaxMin$MinMeanNDSSImonth[DeltaMaxMin$Delta == DeltaName]
      ),
      aes(x = month, y = ndssi, group = month),
      fill = "blue"
    )
  
  return(grid.arrange(Veg, Sed, nrow = 2))
}

Here is are some examples:

DeltaPlotter("Parana")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

DeltaPlotter("Magdalena")

DeltaPlotter("Ebro")

DeltaPlotter("Nile")

DeltaPlotter("Senegal")

DeltaPlotter("Orinoco")

DeltaPlotter("Godavari")

DeltaPlotter("Krishna")

Now for some work with GRDC discharge data:

#import the data (monthly means for 21 stations)
DeltasGRDC  <- read_csv("../data/GRDCstations.csv")
## Parsed with column specification:
## cols(
##   Deltas = col_character(),
##   GRDC_Station = col_double(),
##   Time_Series_Length = col_character(),
##   January = col_double(),
##   February = col_double(),
##   March = col_double(),
##   April = col_double(),
##   May = col_double(),
##   June = col_double(),
##   July = col_double(),
##   August = col_double(),
##   September = col_double(),
##   October = col_double(),
##   November = col_double(),
##   December = col_double()
## )
#calculate the mean of the monthly means
DeltasGRDCwMean <- DeltasGRDC %>% 
    rowwise() %>% 
    mutate(MMD=mean(c(January,February,March,April,
                    May,June,July,August,
                    September,October,November,December)))

DeltasGRDCwMean <- DeltasGRDCwMean %>% 
  rowwise() %>% 
  mutate(Range_Discharge = max(c(January,February,March,April,
              May,June,July,August,
              September,October,November,December)) - 
           min(c(January,February,March,April,
              May,June,July,August,
              September,October,November,December))
           )
        

#join to location data:
DeltawLocGRDC <- left_join(DeltaDatawLocations, DeltasGRDCwMean, by = c("Delta" = "Deltas"))


#plot mean of monthly means against NDSSI
ggplot(DeltawLocGRDC, aes(y = Range_Discharge, x = rangeNDSSI)) + geom_point() + scale_y_continuous(trans='log10')
## Warning: Removed 12 rows containing missing values (geom_point).

#ggsave("GRDCNDSSI.pdf", width = 6, height = 4)

ggplot(DeltawLocGRDC, aes(y = Range_Discharge, x = MaxMeanNDSSI)) + geom_point() + scale_y_continuous(trans='log10')
## Warning: Removed 12 rows containing missing values (geom_point).

#rename the months by numbers and tidy the GRDC data
DeltasDischarge <- DeltasGRDC %>%
  rename(Delta = Deltas,"1" = January, "2"= February, "3"= March, "4"= April,
         "5"=May, "6"=June, "7"=July, "8"= August, "9" = September, "10"=October, 
         "11"=November, "12"=December) %>%
  select(Delta, "1" , "2" , "3", "4","5", "6", "7", "8", "9", "10", "11", "12") %>%
  pivot_longer(-Delta, names_to = "month", values_to = "discharge")

#find max GRDC month for each delta
DeltaMaxDischarge <- 
  DeltasDischarge %>% 
  group_by(Delta) %>% 
  slice(which.max(discharge)) %>% 
  rename(MaxDischargeMonth = month, MaxDischarge = discharge) 


DeltaMaxDischarge$MaxDischargeMonth = as.numeric(DeltaMaxDischarge$MaxDischargeMonth)


#join with other delta data
DeltaMaxMinDischarge <- left_join(DeltaMaxMin, DeltaMaxDischarge, by = 'Delta')

#calculate offset
DeltaMaxMinDischarge <- DeltaMaxMinDischarge %>%
  mutate( DissOff = if_else(abs(MaxDischargeMonth - MaxMeanNDSSImonth) > 6,
                            12 - abs(MaxDischargeMonth - MaxMeanNDSSImonth),
                            abs(MaxDischargeMonth - MaxMeanNDSSImonth))
          )

#Compare offset with NDSSI (deltamaxmin$maxmeanNDSSImonth)
ggplot(DeltaMaxMinDischarge, aes(y = Delta, x = DissOff)) + geom_point() + 
  scale_x_discrete(limits = c(1:6), breaks = c(1:6)) +
  expand_limits(x = c(0,6))  + 
  ggtitle("DisOffset")
## Warning: Removed 11 rows containing missing values (geom_point).

Look at GRDC data by latitude:

#join lat data
DeltaDatawLocations <- left_join(DeltaMaxMinDischarge, DeltaLocations, by = c("Delta" = "Deltas"))

DeltaDatawLocations <- DeltaDatawLocations %>%
  mutate(Absolute_Latitude= abs(Lat))

# plot offset on graph by lat
ggplot(DeltaDatawLocations, aes(x = Absolute_Latitude, y = DissOff)) + geom_point() +
  scale_color_gradient(low = "yellow", high = "red", na.value = NA) 
## Warning: Removed 11 rows containing missing values (geom_point).

  #+ geom_smooth(mapping = aes(x = Absolute_Latitude, y = DissOff, ), method=lm ) 

#ggsave("DisOffset.pdf", width = 6, height = 4)

And now on a map:

#plot offset on map
DeltaDisOffsetMap <- world +
  geom_point(aes(x = Lon, y = Lat, color = DissOff),
             data = DeltaDatawLocations, 
             size = 5) + scale_color_gradient( high = "red", low  = "yellow") +
  ggtitle("Offset Between GRDC discharge peak and NDSSI peak in water")

DeltaDisOffsetMap

#ggsave("DeltaOffsetMap.pdf", width = 6, height = 4)